Dimensional Model Scripts
01 Table Creation
CREATE TABLE IF NOT EXISTS tr_wi_2023.zip_county_wda_xwalk
(
zip VARCHAR(5) NOT NULL ENCODE lzo
,county VARCHAR(11) ENCODE lzo
,wda SMALLINT ENCODE az64
,PRIMARY KEY (zip)
)
DISTSTYLE AUTO
;
ALTER TABLE tr_wi_2023.zip_county_wda_xwalk owner to dbadmin11;
GRANT ALL ON TABLE tr_wi_2023.zip_county_wda_xwalk TO dbadmin11;
GRANT SELECT, INSERT, DELETE, UPDATE ON TABLE tr_wi_2023.zip_county_wda_xwalk TO group ci_data_eng;
GRANT SELECT ON TABLE tr_wi_2023.zip_county_wda_xwalk TO group ci_read_group;
GRANT SELECT ON TABLE tr_wi_2023.zip_county_wda_xwalk TO group db_t00111_ro;
------------------------
CREATE TABLE IF NOT EXISTS tr_wi_2023.wi_rdim_week
(
week_id SMALLINT NOT NULL ENCODE az64
,week_code DATE NOT NULL ENCODE az64
,quarter_code CHAR(6) ENCODE lzo
,calendar_year SMALLINT ENCODE az64
,calendar_quarter SMALLINT ENCODE az64
,PRIMARY KEY (week_id)
)
DISTSTYLE AUTO
;
ALTER TABLE tr_wi_2023.wi_rdim_week owner to dbadmin11;
GRANT ALL ON TABLE tr_wi_2023.wi_rdim_week TO dbadmin11;
GRANT SELECT, INSERT, DELETE, UPDATE ON TABLE tr_wi_2023.wi_rdim_week TO group ci_data_eng;
GRANT SELECT ON TABLE tr_wi_2023.wi_rdim_week TO group ci_read_group;
GRANT SELECT ON TABLE tr_wi_2023.wi_rdim_week TO group db_t00111_ro;
-----
CREATE TABLE IF NOT exists tr_wi_2023.wi_rdim_industry
(
code VARCHAR(18) ENCODE lzo
,title VARCHAR(357) ENCODE lzo
,description VARCHAR(24000) ENCODE lzo
)
DISTSTYLE AUTO
;
ALTER TABLE tr_wi_2023.wi_rdim_industry owner to dbadmin11;
-- Permissions
GRANT ALL ON TABLE tr_wi_2023.wi_rdim_industry TO dbadmin11;
GRANT INSERT, SELECT, UPDATE, DELETE ON TABLE tr_wi_2023.wi_rdim_industry TO group ci_data_eng;
GRANT ALL ON TABLE tr_wi_2023.wi_rdim_industry TO group ci_read_group;
GRANT SELECT ON TABLE tr_wi_2023.wi_rdim_industry TO group db_t00111_ro;
-----------------------
CREATE TABLE IF NOT exists tr_wi_2023.wi_rdim_occupation
(
SOC_code VARCHAR(18) ENCODE lzo
,SOC_title VARCHAR(357) ENCODE lzo
)
DISTSTYLE AUTO
;
ALTER TABLE tr_wi_2023.wi_rdim_occupation owner to dbadmin11;
-- Permissions
GRANT ALL ON TABLE tr_wi_2023.wi_rdim_occupation TO dbadmin11;
GRANT INSERT, SELECT, UPDATE, DELETE ON TABLE tr_wi_2023.wi_rdim_occupation TO group ci_data_eng;
GRANT ALL ON TABLE tr_wi_2023.wi_rdim_occupation TO group ci_read_group;
GRANT SELECT ON TABLE tr_wi_2023.wi_rdim_occupation TO group db_t00111_ro;
------------------------
CREATE TABLE IF NOT exists tr_wi_2023.wi_mdim_person
(
ssn VARCHAR(64) ENCODE lzo
,person_id int identity not null
,gender INT ENCODE az64
,race VARCHAR(3) ENCODE lzo
,ethnicity VARCHAR(3) ENCODE lzo
,birth_date DATE ENCODE az64
,primary key(person_id)
)
DISTSTYLE AUTO
;
ALTER TABLE tr_wi_2023.wi_mdim_person owner to dbadmin11;
-- Permissions
GRANT ALL ON TABLE tr_wi_2023.wi_mdim_person TO dbadmin11;
GRANT INSERT, SELECT, UPDATE, DELETE ON TABLE tr_wi_2023.wi_mdim_person TO group ci_data_eng;
GRANT ALL ON TABLE tr_wi_2023.wi_mdim_person TO group ci_read_group;
GRANT SELECT ON TABLE tr_wi_2023.wi_mdim_person TO group db_t00111_ro;
------------------------
CREATE TABLE IF NOT EXISTS tr_wi_2023.wi_fact_weekly_observation
(
person_id int not null
,week_ending_date date
,week_id smallint NOT NULL
,benefit_claimed varchar(1) not null
,benefit_yr_start date
,normal_benefit_received varchar(1) not null
,ic_claim_date date
,last_employer_naics varchar(6)
,last_employer int
,eligible_benefit_amount int
,earnings_during_wk varchar(3)
,entitlement int
,veteran_status int4
,commuter varchar(3)
,education varchar(3)
,disability varchar(3)
,ic_type varchar(3)
,occupation varchar
,program_type varchar(1)
,status_code varchar(3)
,stop_payment_indicator varchar(1)
,waiting_week varchar(1)
,res_zip varchar(5)
,employed_in_quarter varchar(1) not null
,calendar_year smallint
,calendar_quarter smallint
,primary_employer_id varchar(10)
,primary_employer_wages int
,total_wages int
,employer_count SMALLINT
,PRIMARY KEY (person_id, week_id)
)
DISTSTYLE auto
;
ALTER TABLE tr_wi_2023.wi_fact_weekly_observation owner to dbadmin11;
GRANT ALL ON TABLE tr_wi_2023.wi_fact_weekly_observation TO dbadmin11;
GRANT SELECT, INSERT, DELETE, UPDATE ON TABLE tr_wi_2023.wi_fact_weekly_observation TO group ci_data_eng;
GRANT SELECT ON TABLE tr_wi_2023.wi_fact_weekly_observation TO group ci_read_group;
GRANT SELECT ON TABLE tr_wi_2023.wi_fact_weekly_observation TO group db_t00111_ro;
ALTER TABLE tr_wi_2023.wi_fact_weekly_observation
ADD FOREIGN KEY (person_id)
REFERENCES tr_wi_2023.wi_mdim_person(person_id)
ALTER TABLE tr_wi_2023.wi_fact_weekly_observation
ADD FOREIGN KEY (week_id)
REFERENCES tr_wi_2023.wi_rdim_week(week_id)
ALTER TABLE tr_wi_2023.wi_fact_weekly_observation
ADD FOREIGN KEY (res_zip)
REFERENCES tr_wi_2023.wi_rdim_zip_county_wda_xwalk(zip)
ALTER TABLE tr_wi_2023.wi_fact_weekly_observation
ADD FOREIGN KEY (occupation)
REFERENCES tr_wi_2023.wi_rdim_occupation(soc_code)
ALTER TABLE tr_wi_2023.wi_fact_weekly_observation
ADD FOREIGN KEY (last_employer_naics)
REFERENCES tr_wi_2023.wi_rdim_industry(code)02 Linkage Reference Dimension Load Scripts
--populate week table
CREATE OR REPLACE PROCEDURE populating_wi_rdim_week() --need TO CREATE PROCEDURE
LANGUAGE plpgsql
as $$
declare
StartDate DATE := '2006-01-07'; -- setting START date
EndDate DATE := '2023-12-30'; -- setting END date
Date DATE := StartDate; -- setting the variable date TO increate WITH EACH loop
ID smallint :=1; -- setting id variable
begin
while Date <= EndDate loop -- starting the while loop
raise notice 'this is the date %', Date; -- printing OUT what date im on
insert into tr_wi_2023.wi_rdim_week (
week_id,
week_code,
quarter_code,
calendar_year,
calendar_quarter
)
values(
ID, -- Quarter ID
Date, -- Week Code
CAST(DATE_PART(y,Date) AS CHAR(4)) + 'Q' + CAST(DATE_PART(qtr,Date) AS CHAR(1)), --Quarter_Code
DATE_PART(y,Date), --Calendar_Year
DATE_PART(qtr,Date) --Calendar_Quarter
);
Date := dateadd(w,1,Date); --increasing the date variable
ID := ID + 1; -- incresing the id variable
end loop;
raise info 'loop ended';
end;
$$;
call populating_wi_rdim_week();
-------
-- populate occupation table
--only 2-digit soc presently reported in PROMIS
select distinct substring(occupation, 1, 2) from ds_wi_dwd.promis p order by 1;
select distinct substring(occupation, 3, 8) from ds_wi_dwd.promis p;
--can join to soc_xwalk where soc_group = 'Major' without duplicating join
--when just using first two characters in soc_title from soc_xwalk
select distinct p.occupation, sx.soc_title
from ds_wi_dwd.promis p
left join ds_public_1.soc_xwalk sx
on substring(p.occupation, 1, 2) = substring(sx.soc_code, 1, 2) and sx.soc_group = 'Major'
order by 1;
--will take just first two characters from soc_code into reference table and limit xwalk to just rows where
-- soc_group = 'Major'
insert into tr_wi_2023.wi_rdim_occupation(
soc_code,
soc_title
)
select
substring(soc_code, 1, 2) as soc_code,
soc_title
from ds_public_1.soc_xwalk sx
where sx.soc_group = 'Major'
--- industry table
insert into tr_wi_2023.wi_rdim_industry(
code,
title,
description
)
select
code,
title,
description
from ds_public_1.naics_descriptions 03 Person Master
--MASTERING RULES: IF GENDER, RACE, ETHNICITY, OR BIRTH_DATE CHANGE FOR AN SSN OVER TIME
-- TAKE THE MOST COMMON VALUE. IN TIES, PRIORITIZE KNOWN VALUES OVER UNKNOWN
-- first will find number of ssn values this concerns
with mult as (
SELECT ssn
FROM ds_wi_dwd.promis p
group by ssn
having count(distinct(gender)) > 1 or count(distinct(race)) > 1 or count(distinct(ethnicity)) > 1 or count(distinct(birth_date)) > 1
)
select count(distinct(ssn))
from mult
--MASTERING CODE
insert into tr_wi_2023.wi_mdim_person(
ssn
,gender
,race
,ethnicity
,birth_date
)
with gender_tab as
(
select ssn
, gender
, ROW_NUMBER() over(partition by ssn order by count(*) desc,
--PRIORITIZE KNOWN GENDER 1 AND 2 OVER UNKNOWN 0 AND 3
case when gender = 0 then 2
when gender = 1 then 1
when gender = 2 then 1
when gender = 3 then 2
else 3 end
) as RowNum
from ds_wi_dwd.promis
group by ssn, gender
),
race_tab as
(
select ssn
, race
, ROW_NUMBER() over(partition by ssn order by count(*) desc,
--PRIORITIZE KNOWN RACE OVER UNKNOWN, UNKNOWN OVER NULL
case when race in (1,2,3,4,5,8) then 1
when race in (0, 6) then 2
when race is null then 3
else 4 end
) as RowNum
from ds_wi_dwd.promis
group by ssn, race
),
ethnicity_tab as (
select ssn
, ethnicity
, ROW_NUMBER() over(partition by ssn order by count(*) desc,
--PRIORITIZE KNOWN ETHNICITY OVER UNKNOWN, UNKNOWN OVER NULL
case when ethnicity in ('Y', 'N') then 1
when ethnicity = '*' then 2
when ethnicity is null then 3
else 4 end
) as RowNum
from ds_wi_dwd.promis
group by ssn, ethnicity
),
birthdate_tab as (
select ssn
, birth_date
, ROW_NUMBER() over(partition by ssn order by count(*) desc,
--PRIORITIZE KNOWN BIRTH_DATE OVER IMPOSSIBLE, NULL OVER IMPOSSIBLE
case when birth_date is null then 2
when extract(year from birth_date) > 2023 then 3
else 1 end
) as RowNum
from ds_wi_dwd.promis
group by ssn, birth_date
),
mastered_tab as (
select g.ssn, g.gender, r.race, e.ethnicity, b.birth_date
from gender_tab g
inner join race_tab r on g.ssn = r.ssn and g.RowNum = 1 and r.RowNum = 1
inner join ethnicity_tab e on g.ssn = e.ssn and e.RowNum = 1
inner join birthdate_tab b on g.ssn = b.ssn and b.RowNum = 1
),
--MAKE SURE WE GET EVERYONE IN WAGE RECORDS WHO DOESN'T SHOW UP IN PROMIS TOO
unique_ssn_wage as (
select distinct ssn
from ds_wi_dwd.ui_wage uw
)
--JOIN EVERYONE IN PROMIS TO EVERYONE IN WAGE WHO DOESN'T SHOW UP IN PROMIS TO GET FULL SET OF PEOPLE
select
coalesce(m.ssn, u.ssn) as ssn
, m.gender, m.race
, m.ethnicity
, m.birth_date
from mastered_tab m
full outer join unique_ssn_wage u
on m.ssn = u.ssn ;04 Fact Table Load
insert into tr_wi_2023.wi_fact_weekly_observation(
person_id
,week_ending_date
,week_id
,benefit_claimed
,benefit_yr_start
,normal_benefit_received
,ic_claim_date
,last_employer_naics
,last_employer
,eligible_benefit_amount
,earnings_during_wk
,entitlement
,veteran_status
,commuter
,education
,disability
,ic_type
,occupation
,program_type
,status_code
,stop_payment_indicator
,waiting_week
,res_zip
,employed_in_quarter
,calendar_year
,calendar_quarter
,primary_employer_id
,primary_employer_wages
,total_wages
,employer_count
)
--FOR HERE: SSN WHERE CLAUSE TO SPEED UP QUERY
--get all week/person combos
with person_quarter_combos as (
select wmp.ssn, wrw.week_code
from tr_wi_2023.wi_mdim_person wmp
cross join tr_wi_2023.wi_rdim_week wrw
--where wmp.ssn = 'REDACTED'
),
promis_info as (
select
p.ssn
,week_ending_date as claim_week
,effective_date_of_claim as benefit_yr_start
,case
when ic_type is null and monetarily_eligible = 'Y' and stop_payment_indicator = 'N' and waiting_week = 'N' and entitlement = 0 then 'Y'
else 'N'
end as normal_benefit_received
,ic_claim_date
,last_employer_naics
,ui_number as last_employer
,weekly_benefit_amount as eligible_benefit_amount
,earnings_during_wk
,entitlement
,veteran_status
,commuter
,education
,disability
,ic_type
,substring(occupation, 1, 2) as occupation
,program_type
,status_code
,stop_payment_indicator
,waiting_week
,res_zip
from ds_wi_dwd.promis p
),
--FOR NOW: IGNORE ALL WAGE RECORDS WHERE UI ACCOUNT NUMBER ISN'T ALL INTEGERS
Wage_Rank AS (
SELECT
w.ssn,
w.year,
w.quarter,
ROW_NUMBER() OVER(PARTITION BY w.ssn, w.year, w.quarter ORDER BY w.wage DESC) AS RANK,
w.wage,
w.ui_account
FROM ds_wi_dwd.ui_wage w
where w.wage > 0 and substring(w.ui_account,1,1) != 'M'
),
Primary_Employer_Wage AS (
select
WR.ssn,
WR.year,
WR.quarter,
WR.wage AS Primary_Employer_Wages,
WR.ui_account as Primary_Employer_ID
from Wage_Rank WR
WHERE
WR.RANK=1
),
All_Employer_Wage AS (
SELECT
WR.ssn,
WR.year,
WR.quarter,
COUNT(WR.ui_account) AS Employer_Count,
SUM(WR.wage) AS Total_Wages
FROM
Wage_Rank WR
GROUP BY
WR.ssn,
WR.year,
WR.quarter
)
select person.person_id
,pq.week_code as week_ending_date
,week.week_id
,case when pi.claim_week is null then 'N' else 'Y' end as benefit_claimed
,pi.benefit_yr_start
,case when pi.normal_benefit_received is null then 'N' else pi.normal_benefit_received end as normal_benefit_received
,pi.ic_claim_date
,pi.last_employer_naics
,pi.last_employer
,pi.eligible_benefit_amount
,pi.earnings_during_wk
,pi.entitlement
,pi.veteran_status
,pi.commuter
,pi.education
,pi.disability
,pi.ic_type
,pi.occupation
,pi.program_type
,pi.status_code
,pi.stop_payment_indicator
,pi.waiting_week
,pi.res_zip
,CASE WHEN PEW.ssn IS NULL THEN 'N' ELSE 'Y' END AS employed_in_quarter
,week.calendar_year
,week.calendar_quarter
,PEW.Primary_Employer_ID as primary_empoyer_id
,PEW.Primary_Employer_Wages as primary_employer_wages
,AEW.Total_Wages as total_wages
,AEW.Employer_Count as employer_count
from person_quarter_combos pq
join tr_wi_2023.wi_mdim_person person on (pq.ssn = person.ssn)
join tr_wi_2023.wi_rdim_week week on (pq.week_code = week.week_code)
left join promis_info pi on (pq.ssn = pi.ssn) and (pq.week_code = pi.claim_week)
LEFT JOIN Primary_Employer_Wage PEW ON (PEW.ssn=pq.ssn) AND (PEW.quarter=week.calendar_quarter) and (PEW.year = week.calendar_year)
LEFT JOIN All_Employer_Wage AEW ON (AEW.ssn=pq.ssn) AND (AEW.quarter=week.calendar_quarter) and (AEW.year = week.calendar_year)
order by person.person_id, pq.week_code;